
library(rio)
library(ggplot2)
library(texreg)
library(tidyverse)
library(AER)

##set the working directory for the replication archive
working_dir <- "~/Dropbox/dueling project/2nd_paper/replication archive/"

source(paste0(working_dir,"Auxiliary Scripts/RegressionRunner.R"))


##prepare the regressions by defining the names of the dependent variables
varnames <- c("AllOrgs","Putnam","pvote2012","nccs2014",
              "DRUGTOT", "MURDER", "RAPE", "GRNDTOT",
              "Mortality", "Premature mortality rate",
              "% without health insurance"
)

varnames_rev <- c("Num. Social Orgs","Num. Civic Orgs","Voter Turnout (2012)","Num. Nonprofits",
                  "Drug Arrest Rate", "Murder Arrest Rate", "Rape Arrest Rate", "All Arrest Rate",
                  "Mortality Rate", "Premature Mortality Rate",
                  "% without Health Insurance"
)

##we will vary the decade that we use for PO counts from 1850 to 1880

res <- NULL

decs <- seq(1850, 1890, by = 10)

for (d in decs){
  
  ##load in the main datasets
  if (d < 1890) all_together <- readRDS(paste0(working_dir,"Data/contemporary_social_capital_data_", d, "po.RDS"))
  if (d == 1890) all_together <- readRDS(paste0(working_dir,"Data/contemporary_social_capital_data_with_news.RDS"))
  
  ##run the regressions for both the pretreatment and post treatment cases
  pretreat_only <- as.data.frame(do.call(rbind, lapply(varnames, reg_runner_alt, covars = "pretreat", dataset = all_together)))
  all_vars <- as.data.frame(do.call(rbind, lapply(varnames, reg_runner_alt, covars = "all", dataset = all_together)))
  
  pretreat_only$Covariates <- "1890 Covariates Only"
  all_vars$Covariates <- "1890 Covariates + Contemporary Controls"
  
  ##bind together the results
  all_est <- rbind(pretreat_only, all_vars)
  all_est$DV <- as.factor(rep(varnames_rev, 2))
  
  
  ##make coefficient plots
  sc_plots_rev <-
    all_est %>%
    ggplot(aes(reorder(DV,Estimate), Estimate, colour = Covariates)) +
    geom_point(size = 3, position=position_dodge(width=0.5)) +
    geom_errorbar(
      aes(ymin = Estimate - 1.65*`Std. Error`, ymax = Estimate + 1.65*`Std. Error`),
      width = 0.01,
      linetype = "solid",
      position=position_dodge(width=0.5)) +
    xlab("") +
    ylab(paste0("Coefficient on Post Offices (logged, ",d,")"))+
    geom_hline(yintercept = 0, linetype = "dotted") +
    coord_flip() +
    theme_bw() +
    theme(legend.position = "bottom",
          axis.text.x = element_text(size = 15), axis.text.y = element_text(size = 15),
          legend.text = element_text(size = 15),
          axis.title.x = element_text(size = 15))

  ggsave(sc_plots_rev,
         file = paste0(working_dir,"Replication Plots/social_capital_plots_using_decade_",d,".pdf"),
         width = 12, height = 4)

  all_est$decade <- d
  
  res <- rbind(res, all_est)
  
}

##Time to summarize results
res$Sign <- sign(res$Estimate)
res$`Statistically Significant` <- "No"
res$`Statistically Significant`[abs(res$Estimate/res$`Std. Error`) >= 1.64] <- "Yes"

sigPlot <-
  res %>%
  filter(Covariates != "1890 Covariates Only") %>%
  group_by(DV, Covariates) %>%
  mutate(numSig = sum(`Statistically Significant` == "Yes")) %>%
  ungroup() %>%
  mutate(DV = reorder(paste(DV, Covariates, sep = "__"), numSig))  %>%
  ggplot(., aes(decade, DV)) + 
  geom_tile(aes(fill = `Statistically Significant`), colour = "grey50") +
  facet_wrap(~ Covariates, scales = "free") +
  theme_bw() +
  xlab("Postal Decade Used") +
  ylab("") + 
  scale_y_discrete(labels = function(x) gsub("__.+$", "", x)) +
  theme(legend.position = "bottom",
        axis.text.x = element_text(size = 15), axis.text.y = element_text(size = 15),
        legend.text = element_text(size = 15),
        axis.title.x = element_text(size = 15))

ggsave(sigPlot,
       file = paste0(working_dir,"Replication Plots/sigPlotVaryingDecade.pdf"),
       width = 12, height = 4)


